In [56]:
# Connecting to Google drive

from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive

Install and import dependecies

In [57]:
pip install ts2vg
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: ts2vg in /usr/local/lib/python3.8/dist-packages (1.0.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from ts2vg) (1.21.6)
In [58]:
!pip install igraph
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: igraph in /usr/local/lib/python3.8/dist-packages (0.10.2)
Requirement already satisfied: texttable>=1.6.2 in /usr/local/lib/python3.8/dist-packages (from igraph) (1.6.7)
In [59]:
import pandas as pd
from glob import glob
import os
from ts2vg import NaturalVG
import numpy as np
from ts2vg import HorizontalVG
In [60]:
# Mount your Google Drive
drive.mount('/gdrive')
Mounted at /gdrive

Folder structure

Task 1

  1. Apply natural visibility graph (NVG) and horizontal visibility graph (HVG) to the aforementioned data
  2. Compute average degree, network diameter, and average path length
  3. For the above computations select sample size of 1024 data points ( from 1000 to 2024) for each of the 15 time series
  4. Tabulate all the results
  5. Generate scatter plots: average degree vs network diameter and color the points according to walking and running (do this for each accelerometer signal and each method (HVH and NVG))
  6. Generate scatter plots: average degree vs network diameter and color the points according to climbing up and climbing down (do this for each accelerometer signal and each method (HVH and NVG))
In [61]:
# A code block to loop through the above folder structure and generate HVG and NVG parameters
result = []
for method in ['HVG','NVG']:
    nodes = []
    links = []
    degree = []
    dia = []
    lenn = []
    activity = []
    area = []
    sub = []
    atr = []
    for i in range(1, 16):
        p = "/gdrive/MyDrive/Project_2_IE5374/Subject"+" "+str(i)
        for file_name in os.listdir(p):
            f = "/" + p + "/" + file_name
            for file_name_two in os.listdir(f):
                if 'readMe' in file_name_two:
                    continue
                t = file_name_two.split('_')
                if '2' in t:
                    row = 3
                else:
                    row = 2
                df = pd.read_csv(f+'/'+file_name_two)
                for j in ['x', 'y', 'z']:
                    attr = j
                    col = 'attr_'+attr
                    if method == 'NVG':
                        g = NaturalVG()
                    elif method == 'HVG':
                        g = HorizontalVG()
                    g.build(df[col].iloc[1000:2024])
                    ig_g = g.as_igraph()    
                    nodes.append(ig_g.vcount())
                    links.append(ig_g.ecount())
                    degree.append(np.mean(ig_g.degree()))
                    dia.append(ig_g.diameter())
                    lenn.append(ig_g.average_path_length())
                    activity.append(t[1])
                    area.append((t[row]).split('.')[0])
                    sub.append("Subject "+str(i))
                    atr.append(attr)
    df = pd.DataFrame()
    df['Subject'] = sub
    df['Activity'] = activity
    df['Attribute'] = atr
    df['Sensor_Area'] = area
    df['Nodes'] = nodes
    df['Number_of_Links'] = links
    df['Average_Degree'] = degree
    df['Network_Diameter'] = dia
    df['Average_Path_Length'] = lenn 
    df['Method'] = method               
    result.append(df)
In [62]:
# HVG data table
result[0].head(5)
Out[62]:
Subject Activity Attribute Sensor_Area Nodes Number_of_Links Average_Degree Network_Diameter Average_Path_Length Method
0 Subject 1 running x chest 1024 2029 3.962891 22 7.543843 HVG
1 Subject 1 running y chest 1024 1964 3.835938 90 31.634922 HVG
2 Subject 1 running z chest 1024 2030 3.964844 19 8.726860 HVG
3 Subject 1 running x forearm 1024 2012 3.929688 39 15.741542 HVG
4 Subject 1 running y forearm 1024 1895 3.701172 33 13.217918 HVG
In [63]:
# NVG data table
result[1].head(5)
Out[63]:
Subject Activity Attribute Sensor_Area Nodes Number_of_Links Average_Degree Network_Diameter Average_Path_Length Method
0 Subject 1 running x chest 1024 4308 8.414062 9 4.307645 NVG
1 Subject 1 running y chest 1024 5702 11.136719 82 28.651949 NVG
2 Subject 1 running z chest 1024 4259 8.318359 11 4.644369 NVG
3 Subject 1 running x forearm 1024 8300 16.210938 11 4.509737 NVG
4 Subject 1 running y forearm 1024 5999 11.716797 13 4.893262 NVG
In [64]:
# Function to plot Scatter plots
import seaborn as sns
import matplotlib.pyplot as plt

def Scatter_Plot_TaskOne(activity, dataframe):
    df_temp = dataframe.loc[dataframe['Activity'].isin(activity),:]
    for q in df_temp['Sensor_Area'].unique():
        df_q = df_temp.loc[df_temp['Sensor_Area'] == q]
        for j in ['x', 'y', 'z']:
            df_p = df_q.loc[df_temp['Attribute'] == j]
            ax_dim = sns.scatterplot(data= df_p, x = "Average_Degree", y = "Network_Diameter", hue = "Activity")
            ax_dim.set_title('Attribute:'+j+' Method:'+(df_p['Method'].unique())[0]+' Sensor:'+q)
            plt.show()
In [65]:
# Scatter plot for HVG walking and running for attributes x,y and z
Scatter_Plot_TaskOne(['walking','running'], result[0])
In [66]:
# Scatter plot for HVG climbing up and climbing dowm for attributes x,y and z
Scatter_Plot_TaskOne(['climbingup','climbingdown'], result[0])
In [67]:
# Scatter plot for NVG walking and running for attributes x,y and z
Scatter_Plot_TaskOne(['walking','running'], result[1])
In [68]:
# Scatter plot for HVG climbing up and climbing dowm for attributes x,y and z
Scatter_Plot_TaskOne(['climbingup','climbingdown'], result[1])

Task 2

  1. Compute permutation entropy and complexity for the aforementioned data. Consider the accelerometer data in all three directions
  2. Vary the following parameters Embedded Dimension 3, 4, 5, 6 Embedded Delay 1, 2, 3 Signal length 1024, 2048, 4096
  3. Generate scatter plots: permutation entropy vs complexity and color the points according to walking and running (for signal length =4096, embedded delay = 1, and embedded dimension = 3, 4, 5, 6, and all three accelerometer directions)
  4. Generate scatter plots: permutation entropy vs complexity and color the points according to climbing up and climbing down (for signal length =4096, embedded delay = 1, and embedded dimension = 3, 4, 5, 6, all three accelerometer directions)
In [69]:
''' This module has essential functions supporting
fast and effective computation of permutation entropy and
its different variations.'''
import numpy as np


def s_entropy(freq_list):
    ''' This function computes the shannon entropy of a given frequency distribution.
    USAGE: shannon_entropy(freq_list)
    ARGS: freq_list = Numeric vector representing the frequency distribution
    OUTPUT: A numeric value representing shannon's entropy'''
    freq_list = [element for element in freq_list if element != 0]
    sh_entropy = 0.0
    for freq in freq_list:
        sh_entropy += freq * np.log(freq)
    sh_entropy = -sh_entropy
    return(sh_entropy)

def ordinal_patterns(ts, embdim, embdelay):
    ''' This function computes the ordinal patterns of a time series for a given embedding dimension and embedding delay.
    USAGE: ordinal_patterns(ts, embdim, embdelay)
    ARGS: ts = Numeric vector representing the time series, embdim = embedding dimension (3<=embdim<=7 prefered range), embdelay =  embdding delay
    OUPTUT: A numeric vector representing frequencies of ordinal patterns'''
    m, t = embdim, embdelay
    x = ts if isinstance(ts, np.ndarray) else np.array(ts) 

    tmp = np.zeros((x.shape[0], m))
    for i in range(m):
        tmp[:, i] = np.roll(x, i*t)
    partition = tmp[(t*m-1):, :] 
    permutation = np.argsort(partition)
    idx = _hash(permutation)

    counts = np.zeros(np.math.factorial(m))
    for i in range(counts.shape[0]):
        counts[i] = (idx == i).sum()
    return list(counts[counts != 0].astype(int))

def _hash(x):
    m, n = x.shape
    if n == 1:
        return np.zeros(m)
    return np.sum(np.apply_along_axis(lambda y: y < x[:, 0], 0, x), axis=1) * np.math.factorial(n-1) + _hash(x[:, 1:]) 
    

def p_entropy(op):
    ordinal_pat = op
    max_entropy = np.log(len(ordinal_pat))
    p = np.divide(np.array(ordinal_pat), float(sum(ordinal_pat)))
    return(s_entropy(p)/max_entropy)

def complexity(op):
    ''' This function computes the complexity of a time series defined as: Comp_JS = Q_o * JSdivergence * pe
    Q_o = Normalizing constant
    JSdivergence = Jensen-Shannon divergence
    pe = permutation entopry
    ARGS: ordinal pattern'''
    pe = p_entropy(op)
    constant1 = (0.5+((1 - 0.5)/len(op)))* np.log(0.5+((1 - 0.5)/len(op)))
    constant2 = ((1 - 0.5)/len(op))*np.log((1 - 0.5)/len(op))*(len(op) - 1)
    constant3 = 0.5*np.log(len(op))
    Q_o = -1/(constant1+constant2+constant3)

    temp_op_prob = np.divide(op, sum(op))
    temp_op_prob2 = (0.5*temp_op_prob)+(0.5*(1/len(op)))
    JSdivergence = (s_entropy(temp_op_prob2) - 0.5 * s_entropy(temp_op_prob) - 0.5 * np.log(len(op)))
    Comp_JS = Q_o * JSdivergence * pe
    return(Comp_JS)

def weighted_ordinal_patterns(ts, embdim, embdelay):
    m, t = embdim, embdelay
    x = ts if isinstance(ts, np.ndarray) else np.array(ts) 

    tmp = np.zeros((x.shape[0], m))
    for i in range(m):
        tmp[:, i] = np.roll(x, i*t)
    partition = tmp[(t*m-1):, :] 
    xm = np.mean(partition, axis=1)
    weight = np.mean((partition - xm[:, None])**2, axis=1)
    permutation = np.argsort(partition)
    idx = _hash(permutation)
    counts = np.zeros(np.math.factorial(m))
    for i in range(counts.shape[0]):
        counts[i] = sum(weight[i == idx])

    return list(counts[counts != 0])
In [71]:
# A code block to loop through the above folder structure and generate permutation entropy and complexity

embedded_dimensions = [3, 4, 5, 6]
embedded_dela = [1, 2, 3]
signal_len = [1024, 2048, 4096]
permutation_entropy_a = []
complexity_a = []
activity = []
area = []
sub = []
atr = []
len_a = []
delay_a = []
dim_a = []
for i in range(1, 16):
    p = "/gdrive/MyDrive/Project_2_IE5374/Subject"+" "+str(i)
    for file_name in os.listdir(p):
        f = "/"+p+"/"+file_name
        for file_name_two in os.listdir(f):
            if 'readMe' in file_name_two:
                continue;
            t = file_name_two.split('_')
            if '2' in t:
                row = 3
            else:
                row = 2
            df = pd.read_csv(f+'/'+file_name_two)
            for j in ['x','y','z']:
                attr = j
                col = 'attr_'+attr
                for lenn in signal_len:
                    for delay in embedded_dela:
                        for dim in embedded_dimensions:
                            op = ordinal_patterns(df[col].iloc[1000:(1000+lenn)], int(dim), int(delay))
                            permutation_entropy_a.append(p_entropy(op))
                            complexity_a.append(complexity(op))
                            activity.append(t[1])
                            area.append((t[row]).split('.')[0])
                            sub.append("Subject "+str(i))
                            atr.append(attr)
                            len_a.append(lenn)
                            delay_a.append(delay)
                            dim_a.append(dim)
            else:continue;

df_task_two = pd.DataFrame()
df_task_two['Activity'] = activity
df_task_two['Attribute'] = atr
df_task_two['Sensor_Area'] = area
df_task_two['Subject'] = sub
df_task_two['Signal_Length'] = len_a
df_task_two['Embedded_Delay'] = delay_a
df_task_two['Embedded_Dimensions'] = dim_a
df_task_two['Permutation_Entropy'] = permutation_entropy_a
df_task_two['Complexity'] = complexity_a
In [72]:
df_task_two.head(5)
Out[72]:
Activity Attribute Sensor_Area Subject Signal_Length Embedded_Delay Embedded_Dimensions Permutation_Entropy Complexity
0 running x chest Subject 1 1024 1 3 0.900066 0.086750
1 running x chest Subject 1 1024 1 4 0.850678 0.165682
2 running x chest Subject 1 1024 1 5 0.860076 0.191229
3 running x chest Subject 1 1024 1 6 0.907348 0.149049
4 running x chest Subject 1 1024 2 3 0.977626 0.022381
In [73]:
import seaborn as sns
import matplotlib.pyplot as plt

def Scatter_Plot(signal_length, delay, dimensions, activity, dataframe):
    df_filtered = dataframe.loc[dataframe['Activity'].isin(activity),:]
    for j in ['x','y','z']:
        df_j = df_filtered.loc[df_filtered['Attribute'] == j]
        for q in df_j['Sensor_Area'].unique():
            df_t = df_filtered.loc[df_filtered['Sensor_Area'] == q]
            for dim in dimensions:  
                ax_dim = sns.scatterplot(data= df_t[(df_t['Signal_Length'] == signal_length) & (df_t['Embedded_Delay'] == delay) & (df_t['Embedded_Dimensions'] == dim)], x="Permutation_Entropy", y="Complexity", hue="Activity")
                ax_dim.set_title('Attribute:'+j+' Dimension:'+str(dim)+';'+' Signal Length:'+str(signal_length)+';'+' Embedded Delay:'+str(delay)+' Sensor Area:'+q)
                plt.show()
In [74]:
Scatter_Plot(4096, 1, [3, 4, 5, 6], ['walking', 'running'], df_task_two)
In [ ]:
Scatter_Plot(4096, 1, [3, 4, 5, 6], ['climbingup', 'climbingdown'], df_task_two)
In [55]:
!jupyter nbconvert --to html Project_2_IE5374-4.ipynb
[NbConvertApp] Converting notebook Project_2_IE5374-4.ipynb to html
[NbConvertApp] Writing 7425917 bytes to Project_2_IE5374-4.html